In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [3]:
life_expectancy_df = pd.read_csv('Life_Expectancy_Data.csv')
In [4]:
life_expectancy_df
Out[4]:
Year Status Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI ... Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
0 2015 Developing 65.0 263.0 62 0.01 71.279624 65.0 1154 19.1 ... 6.0 8.16 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1
1 2014 Developing 59.9 271.0 64 0.01 73.523582 62.0 492 18.6 ... 58.0 8.18 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0
2 2013 Developing 59.9 268.0 66 0.01 73.219243 64.0 430 18.1 ... 62.0 8.13 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9
3 2012 Developing 59.5 272.0 69 0.01 78.184215 67.0 2787 17.6 ... 67.0 8.52 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8
4 2011 Developing 59.2 275.0 71 0.01 7.097109 68.0 3013 17.2 ... 68.0 7.87 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 2004 Developing 44.3 723.0 27 4.36 0.000000 68.0 31 27.1 ... 67.0 7.13 65.0 33.6 454.366654 12777511.0 9.4 9.4 0.407 9.2
2934 2003 Developing 44.5 715.0 26 4.06 0.000000 7.0 998 26.7 ... 7.0 6.52 68.0 36.7 453.351155 12633897.0 9.8 9.9 0.418 9.5
2935 2002 Developing 44.8 73.0 25 4.43 0.000000 73.0 304 26.3 ... 73.0 6.53 71.0 39.8 57.348340 125525.0 1.2 1.3 0.427 10.0
2936 2001 Developing 45.3 686.0 25 1.72 0.000000 76.0 529 25.9 ... 76.0 6.16 75.0 42.1 548.587312 12366165.0 1.6 1.7 0.427 9.8
2937 2000 Developing 46.0 665.0 24 1.68 0.000000 79.0 1483 25.5 ... 78.0 7.10 78.0 43.5 547.358879 12222251.0 11.0 11.2 0.434 9.8

2938 rows × 21 columns

In [5]:
sns.heatmap(life_expectancy_df.isnull(), yticklabels = False, cbar = False, cmap="Blues")
Out[5]:
<AxesSubplot:>
In [6]:
life_expectancy_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2938 entries, 0 to 2937
Data columns (total 21 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Year                             2938 non-null   int64  
 1   Status                           2938 non-null   object 
 2   Life expectancy                  2928 non-null   float64
 3   Adult Mortality                  2928 non-null   float64
 4   infant deaths                    2938 non-null   int64  
 5   Alcohol                          2744 non-null   float64
 6   percentage expenditure           2938 non-null   float64
 7   Hepatitis B                      2385 non-null   float64
 8   Measles                          2938 non-null   int64  
 9    BMI                             2904 non-null   float64
 10  under-five deaths                2938 non-null   int64  
 11  Polio                            2919 non-null   float64
 12  Total expenditure                2712 non-null   float64
 13  Diphtheria                       2919 non-null   float64
 14   HIV/AIDS                        2938 non-null   float64
 15  GDP                              2490 non-null   float64
 16  Population                       2286 non-null   float64
 17   thinness  1-19 years            2904 non-null   float64
 18   thinness 5-9 years              2904 non-null   float64
 19  Income composition of resources  2771 non-null   float64
 20  Schooling                        2775 non-null   float64
dtypes: float64(16), int64(4), object(1)
memory usage: 482.1+ KB
In [7]:
life_expectancy_df.describe()
Out[7]:
Year Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI under-five deaths Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
count 2938.000000 2928.000000 2928.000000 2938.000000 2744.000000 2938.000000 2385.000000 2938.000000 2904.000000 2938.000000 2919.000000 2712.00000 2919.000000 2938.000000 2490.000000 2.286000e+03 2904.000000 2904.000000 2771.000000 2775.000000
mean 2007.518720 69.224932 164.796448 30.303948 4.602861 738.251295 80.940461 2419.592240 38.321247 42.035739 82.550188 5.93819 82.324084 1.742103 7483.158469 1.275338e+07 4.839704 4.870317 0.627551 11.992793
std 4.613841 9.523867 124.292079 117.926501 4.052413 1987.914858 25.070016 11467.272489 20.044034 160.445548 23.428046 2.49832 23.716912 5.077785 14270.169342 6.101210e+07 4.420195 4.508882 0.210904 3.358920
min 2000.000000 36.300000 1.000000 0.000000 0.010000 0.000000 1.000000 0.000000 1.000000 0.000000 3.000000 0.37000 2.000000 0.100000 1.681350 3.400000e+01 0.100000 0.100000 0.000000 0.000000
25% 2004.000000 63.100000 74.000000 0.000000 0.877500 4.685343 77.000000 0.000000 19.300000 0.000000 78.000000 4.26000 78.000000 0.100000 463.935626 1.957932e+05 1.600000 1.500000 0.493000 10.100000
50% 2008.000000 72.100000 144.000000 3.000000 3.755000 64.912906 92.000000 17.000000 43.500000 4.000000 93.000000 5.75500 93.000000 0.100000 1766.947595 1.386542e+06 3.300000 3.300000 0.677000 12.300000
75% 2012.000000 75.700000 228.000000 22.000000 7.702500 441.534144 97.000000 360.250000 56.200000 28.000000 97.000000 7.49250 97.000000 0.800000 5910.806335 7.420359e+06 7.200000 7.200000 0.779000 14.300000
max 2015.000000 89.000000 723.000000 1800.000000 17.870000 19479.911610 99.000000 212183.000000 87.300000 2500.000000 99.000000 17.60000 99.000000 50.600000 119172.741800 1.293859e+09 27.700000 28.600000 0.948000 20.700000
In [42]:
# Plot the histogram
life_expectancy_df.hist(bins = 30, figsize = (20, 20), color = 'b');
In [9]:
plt.figure(figsize = (20,20))
sns.pairplot(life_expectancy_df)
Out[9]:
<seaborn.axisgrid.PairGrid at 0x2b699880220>
<Figure size 1440x1440 with 0 Axes>
In [10]:
sns.scatterplot(data = life_expectancy_df, x = 'Schooling', y = 'Life expectancy ')
Out[10]:
<AxesSubplot:xlabel='Schooling', ylabel='Life expectancy '>
In [11]:
sns.scatterplot(data = life_expectancy_df, x = 'GDP', y = 'Life expectancy ')
Out[11]:
<AxesSubplot:xlabel='GDP', ylabel='Life expectancy '>
In [12]:
sns.scatterplot(data = life_expectancy_df, x = 'Income composition of resources', y = 'Life expectancy ')
Out[12]:
<AxesSubplot:xlabel='Income composition of resources', ylabel='Life expectancy '>
In [45]:
sns.scatterplot(data = life_expectancy_df, x = 'Population', y = 'Schooling')
Out[45]:
<AxesSubplot:xlabel='Population', ylabel='Schooling'>
In [13]:
sns.scatterplot(data = life_expectancy_df, x = ' HIV/AIDS', y = 'Life expectancy ')
Out[13]:
<AxesSubplot:xlabel=' HIV/AIDS', ylabel='Life expectancy '>
In [14]:
sns.scatterplot(data = life_expectancy_df, x = 'Income composition of resources', y = 'Life expectancy ')
Out[14]:
<AxesSubplot:xlabel='Income composition of resources', ylabel='Life expectancy '>
In [15]:
plt.figure(figsize = (20,20))
corr_matrix = life_expectancy_df.corr()
sns.heatmap(corr_matrix, annot = True)
plt.show()
In [16]:
life_expectancy_df
Out[16]:
Year Status Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI ... Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
0 2015 Developing 65.0 263.0 62 0.01 71.279624 65.0 1154 19.1 ... 6.0 8.16 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1
1 2014 Developing 59.9 271.0 64 0.01 73.523582 62.0 492 18.6 ... 58.0 8.18 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0
2 2013 Developing 59.9 268.0 66 0.01 73.219243 64.0 430 18.1 ... 62.0 8.13 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9
3 2012 Developing 59.5 272.0 69 0.01 78.184215 67.0 2787 17.6 ... 67.0 8.52 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8
4 2011 Developing 59.2 275.0 71 0.01 7.097109 68.0 3013 17.2 ... 68.0 7.87 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 2004 Developing 44.3 723.0 27 4.36 0.000000 68.0 31 27.1 ... 67.0 7.13 65.0 33.6 454.366654 12777511.0 9.4 9.4 0.407 9.2
2934 2003 Developing 44.5 715.0 26 4.06 0.000000 7.0 998 26.7 ... 7.0 6.52 68.0 36.7 453.351155 12633897.0 9.8 9.9 0.418 9.5
2935 2002 Developing 44.8 73.0 25 4.43 0.000000 73.0 304 26.3 ... 73.0 6.53 71.0 39.8 57.348340 125525.0 1.2 1.3 0.427 10.0
2936 2001 Developing 45.3 686.0 25 1.72 0.000000 76.0 529 25.9 ... 76.0 6.16 75.0 42.1 548.587312 12366165.0 1.6 1.7 0.427 9.8
2937 2000 Developing 46.0 665.0 24 1.68 0.000000 79.0 1483 25.5 ... 78.0 7.10 78.0 43.5 547.358879 12222251.0 11.0 11.2 0.434 9.8

2938 rows × 21 columns

In [17]:
# Checking the unique values in country to consider it as a categorical variable
life_expectancy_df['Status'].nunique()
Out[17]:
2
In [18]:
life_expectancy_df = pd.get_dummies(life_expectancy_df, columns = ['Status'])
In [19]:
life_expectancy_df
Out[19]:
Year Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI under-five deaths ... Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling Status_Developed Status_Developing
0 2015 65.0 263.0 62 0.01 71.279624 65.0 1154 19.1 83 ... 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1 0 1
1 2014 59.9 271.0 64 0.01 73.523582 62.0 492 18.6 86 ... 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0 0 1
2 2013 59.9 268.0 66 0.01 73.219243 64.0 430 18.1 89 ... 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9 0 1
3 2012 59.5 272.0 69 0.01 78.184215 67.0 2787 17.6 93 ... 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8 0 1
4 2011 59.2 275.0 71 0.01 7.097109 68.0 3013 17.2 97 ... 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 2004 44.3 723.0 27 4.36 0.000000 68.0 31 27.1 42 ... 65.0 33.6 454.366654 12777511.0 9.4 9.4 0.407 9.2 0 1
2934 2003 44.5 715.0 26 4.06 0.000000 7.0 998 26.7 41 ... 68.0 36.7 453.351155 12633897.0 9.8 9.9 0.418 9.5 0 1
2935 2002 44.8 73.0 25 4.43 0.000000 73.0 304 26.3 40 ... 71.0 39.8 57.348340 125525.0 1.2 1.3 0.427 10.0 0 1
2936 2001 45.3 686.0 25 1.72 0.000000 76.0 529 25.9 39 ... 75.0 42.1 548.587312 12366165.0 1.6 1.7 0.427 9.8 0 1
2937 2000 46.0 665.0 24 1.68 0.000000 79.0 1483 25.5 39 ... 78.0 43.5 547.358879 12222251.0 11.0 11.2 0.434 9.8 0 1

2938 rows × 22 columns

In [20]:
life_expectancy_df.isnull().sum()[np.where(life_expectancy_df.isnull().sum() != 0)[0]]
Out[20]:
Life expectancy                     10
Adult Mortality                     10
Alcohol                            194
Hepatitis B                        553
 BMI                                34
Polio                               19
Total expenditure                  226
Diphtheria                          19
GDP                                448
Population                         652
 thinness  1-19 years               34
 thinness 5-9 years                 34
Income composition of resources    167
Schooling                          163
dtype: int64
In [21]:
life_expectancy_df = life_expectancy_df.apply(lambda x: x.fillna(x.mean()),axis=0)
In [22]:
life_expectancy_df.isnull().sum()[np.where(life_expectancy_df.isnull().sum() != 0)[0]]
Out[22]:
Series([], dtype: int64)
In [23]:
X = life_expectancy_df.drop(columns = ['Life expectancy '])
y = life_expectancy_df[['Life expectancy ']]
In [24]:
X
Out[24]:
Year Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI under-five deaths Polio ... Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling Status_Developed Status_Developing
0 2015 263.0 62 0.01 71.279624 65.0 1154 19.1 83 6.0 ... 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1 0 1
1 2014 271.0 64 0.01 73.523582 62.0 492 18.6 86 58.0 ... 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0 0 1
2 2013 268.0 66 0.01 73.219243 64.0 430 18.1 89 62.0 ... 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9 0 1
3 2012 272.0 69 0.01 78.184215 67.0 2787 17.6 93 67.0 ... 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8 0 1
4 2011 275.0 71 0.01 7.097109 68.0 3013 17.2 97 68.0 ... 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 2004 723.0 27 4.36 0.000000 68.0 31 27.1 42 67.0 ... 65.0 33.6 454.366654 12777511.0 9.4 9.4 0.407 9.2 0 1
2934 2003 715.0 26 4.06 0.000000 7.0 998 26.7 41 7.0 ... 68.0 36.7 453.351155 12633897.0 9.8 9.9 0.418 9.5 0 1
2935 2002 73.0 25 4.43 0.000000 73.0 304 26.3 40 73.0 ... 71.0 39.8 57.348340 125525.0 1.2 1.3 0.427 10.0 0 1
2936 2001 686.0 25 1.72 0.000000 76.0 529 25.9 39 76.0 ... 75.0 42.1 548.587312 12366165.0 1.6 1.7 0.427 9.8 0 1
2937 2000 665.0 24 1.68 0.000000 79.0 1483 25.5 39 78.0 ... 78.0 43.5 547.358879 12222251.0 11.0 11.2 0.434 9.8 0 1

2938 rows × 21 columns

In [25]:
y
Out[25]:
Life expectancy
0 65.0
1 59.9
2 59.9
3 59.5
4 59.2
... ...
2933 44.3
2934 44.5
2935 44.8
2936 45.3
2937 46.0

2938 rows × 1 columns

In [26]:
X.shape
Out[26]:
(2938, 21)
In [27]:
y.shape
Out[27]:
(2938, 1)
In [28]:
# Convert the data type to float32

X = np.array(X).astype('float32')
y = np.array(y).astype('float32')
In [29]:
# Only take the numerical variables and scale them
X 
Out[29]:
array([[2.015e+03, 2.630e+02, 6.200e+01, ..., 1.010e+01, 0.000e+00,
        1.000e+00],
       [2.014e+03, 2.710e+02, 6.400e+01, ..., 1.000e+01, 0.000e+00,
        1.000e+00],
       [2.013e+03, 2.680e+02, 6.600e+01, ..., 9.900e+00, 0.000e+00,
        1.000e+00],
       ...,
       [2.002e+03, 7.300e+01, 2.500e+01, ..., 1.000e+01, 0.000e+00,
        1.000e+00],
       [2.001e+03, 6.860e+02, 2.500e+01, ..., 9.800e+00, 0.000e+00,
        1.000e+00],
       [2.000e+03, 6.650e+02, 2.400e+01, ..., 9.800e+00, 0.000e+00,
        1.000e+00]], dtype=float32)
In [30]:
# split the data into test and train sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
In [31]:
# Scale the data
from sklearn.preprocessing import StandardScaler

scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)
In [32]:
# using linear regression model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, accuracy_score

regresssion_model_sklearn = LinearRegression(fit_intercept = True)
regresssion_model_sklearn.fit(X_train, y_train)
Out[32]:
LinearRegression()
In [33]:
regresssion_model_sklearn_accuracy = regresssion_model_sklearn.score(X_test, y_test)
regresssion_model_sklearn_accuracy
Out[33]:
0.8340892521270633
In [34]:
print('Linear Model Coefficient (m): ', regresssion_model_sklearn.coef_)
print('Linear Model Coefficient (b): ', regresssion_model_sklearn.intercept_)
Linear Model Coefficient (m):  [[-2.61061708e-03 -2.48314038e-01  1.30889893e+00  3.24076451e-02
   1.29710045e-02 -2.60179937e-02 -2.49288455e-02  9.39102769e-02
  -1.33250380e+00  6.82824329e-02  2.02552099e-02  9.44694877e-02
  -2.55512834e-01  5.85425347e-02 -1.13813148e-03 -3.10914405e-02
  -1.38065196e-03  1.18575670e-01  2.33237758e-01  2.63141207e-02
  -2.63130777e-02]]
Linear Model Coefficient (b):  [-2.6998622e-08]
In [35]:
# Make prediction

y_predict = regresssion_model_sklearn.predict(X_test)
In [36]:
y_predict
Out[36]:
array([[ 5.52710831e-01],
       [-2.16832328e+00],
       [ 1.67019397e-01],
       [-2.60561049e-01],
       [ 6.41681075e-01],
       [ 9.02905345e-01],
       [-1.15051806e+00],
       [ 2.54885584e-01],
       [-6.32353783e-01],
       [-2.10694575e+00],
       [ 8.92400503e-01],
       [-3.87898594e-01],
       [ 4.61621553e-01],
       [ 3.12996089e-01],
       [-2.75252271e+00],
       [ 4.58067030e-01],
       [ 3.05019468e-01],
       [ 1.12625110e+00],
       [ 7.49843001e-01],
       [-1.09040451e+00],
       [ 6.60644352e-01],
       [ 1.71031833e-01],
       [-3.84888314e-02],
       [-8.21287632e-02],
       [ 1.01489449e+00],
       [ 3.93126875e-01],
       [-1.03998697e+00],
       [ 4.70234126e-01],
       [ 6.57431364e-01],
       [ 2.32481062e-01],
       [-1.52942050e+00],
       [-5.84325194e-01],
       [-1.73759401e+00],
       [ 2.12526619e-01],
       [-1.20008457e+00],
       [-5.68039119e-01],
       [-1.86422586e-01],
       [-2.44574212e-02],
       [-3.50263268e-01],
       [ 8.95781815e-02],
       [ 4.71137226e-01],
       [-9.68977809e-01],
       [ 6.29414976e-01],
       [ 1.71904892e-01],
       [ 1.27129161e+00],
       [-1.93901025e-02],
       [-5.66162646e-01],
       [-1.32934423e-02],
       [-1.93406105e+00],
       [-2.66107631e+00],
       [ 9.77280855e-01],
       [-2.06961229e-01],
       [ 6.55054390e-01],
       [ 5.82997799e-01],
       [-1.65501207e-01],
       [ 1.43740594e+00],
       [-2.34375715e+00],
       [-1.17040658e+00],
       [-1.31018746e+00],
       [ 3.55744928e-01],
       [ 6.49780273e-01],
       [ 5.06207407e-01],
       [-5.09586275e-01],
       [ 8.60826612e-01],
       [ 5.91302872e-01],
       [-4.77294289e-02],
       [-2.68146825e+00],
       [ 5.24610162e-01],
       [ 4.77833807e-01],
       [ 6.54073179e-01],
       [ 8.04929674e-01],
       [-1.73394442e+00],
       [ 1.07581782e+00],
       [-8.08958292e-01],
       [ 2.67772347e-01],
       [ 9.14890110e-01],
       [-7.71501303e-01],
       [ 1.49547547e-01],
       [-4.25266894e-03],
       [ 2.32207656e-01],
       [-9.36321557e-01],
       [ 3.47960321e-03],
       [-2.11641264e+00],
       [ 3.77019972e-01],
       [-8.16926777e-01],
       [ 1.24418724e+00],
       [-1.72595656e+00],
       [-6.13079548e-01],
       [-5.74904859e-01],
       [-5.00015281e-02],
       [ 3.92089523e-02],
       [ 4.12350595e-01],
       [ 9.20182765e-02],
       [ 2.11439073e-01],
       [ 6.10208094e-01],
       [ 3.12711537e-01],
       [ 6.84671700e-01],
       [ 7.47922838e-01],
       [ 2.87155032e-01],
       [-1.30575106e-01],
       [-1.40194976e+00],
       [ 1.07779717e+00],
       [-5.79488814e-01],
       [ 7.57264793e-01],
       [ 5.97016573e-01],
       [ 9.57629263e-01],
       [-1.38976538e+00],
       [-1.06413066e+00],
       [ 9.54084814e-01],
       [ 1.26774669e-01],
       [ 3.49221565e-02],
       [ 5.22761166e-01],
       [ 1.17534131e-01],
       [-1.07028754e-02],
       [ 4.15537596e-01],
       [-9.63635623e-01],
       [-1.51113600e-01],
       [-2.50926875e-02],
       [ 3.06643546e-01],
       [-3.09628509e-02],
       [ 1.47919683e-02],
       [ 1.13012493e+00],
       [ 7.25224614e-01],
       [-1.26446426e+00],
       [-1.59179842e+00],
       [-3.06014329e-01],
       [ 1.22365415e+00],
       [ 5.78518629e-01],
       [-1.00798249e-01],
       [ 7.60962963e-01],
       [ 2.37028420e-01],
       [ 8.12666357e-01],
       [-1.79120839e-01],
       [ 1.27772665e+00],
       [-2.45546317e+00],
       [ 6.60988927e-01],
       [ 3.86145175e-01],
       [-7.20776260e-01],
       [ 7.69019365e-01],
       [ 6.39683366e-01],
       [-9.15374458e-01],
       [ 2.61953861e-01],
       [ 1.26638865e+00],
       [ 4.21032578e-01],
       [ 4.30117160e-01],
       [ 1.48671806e-01],
       [-8.91006649e-01],
       [ 4.97806698e-01],
       [ 2.05942899e-01],
       [-1.02481306e-01],
       [ 8.83263052e-02],
       [-9.72267807e-01],
       [ 7.84177303e-01],
       [ 5.24665713e-01],
       [-6.11437500e-01],
       [-2.89502740e+00],
       [ 7.83232391e-01],
       [-6.30766630e-01],
       [-3.76500463e+00],
       [ 1.02772188e+00],
       [-1.84252656e+00],
       [-1.57603037e+00],
       [ 9.14078474e-01],
       [-8.02739322e-01],
       [ 1.17295370e-01],
       [ 1.04476988e+00],
       [ 4.05269384e-01],
       [-1.22281277e+00],
       [ 3.78242970e-01],
       [ 2.26409748e-01],
       [-9.75948393e-01],
       [-1.48905009e-01],
       [ 9.07619715e-01],
       [ 3.68190795e-01],
       [-1.83150880e-02],
       [-7.16013968e-01],
       [-1.65445998e-01],
       [-7.37916946e-01],
       [ 9.09516633e-01],
       [-8.74774754e-01],
       [-1.34824872e+00],
       [ 6.81117415e-01],
       [ 2.51044720e-01],
       [-2.00158864e-01],
       [ 7.85416722e-01],
       [ 8.66891444e-01],
       [-1.40278518e+00],
       [ 2.81088799e-01],
       [-4.56558675e-01],
       [ 7.10236430e-02],
       [ 4.75339562e-01],
       [-1.16799903e+00],
       [ 6.48208380e-01],
       [ 4.93626922e-01],
       [-8.12274635e-01],
       [-2.25451663e-01],
       [ 1.14626014e+00],
       [ 1.17455757e+00],
       [ 5.95113970e-02],
       [ 4.15205628e-01],
       [ 4.15410906e-01],
       [-1.70823634e+00],
       [ 4.68985468e-01],
       [ 1.54942662e-01],
       [ 4.92507726e-01],
       [-1.65442741e+00],
       [-9.63012278e-01],
       [-2.17627907e+00],
       [-1.40059757e+00],
       [ 7.88518786e-02],
       [-2.85863131e-01],
       [-1.09748232e+00],
       [ 1.89638168e-01],
       [ 1.07717133e+00],
       [-3.52683330e+00],
       [-8.49122465e-01],
       [ 1.18430519e+00],
       [ 6.69337928e-01],
       [ 2.31849290e-02],
       [ 7.77192652e-01],
       [-1.11681783e+00],
       [ 9.24960673e-01],
       [-8.50136936e-01],
       [ 4.67970103e-01],
       [ 1.20765483e+00],
       [ 1.48111999e+00],
       [-8.41505378e-02],
       [ 6.36683226e-01],
       [ 1.45922542e-01],
       [ 8.47810745e-01],
       [-8.98610055e-02],
       [ 2.85117179e-01],
       [ 1.28646851e-01],
       [-9.05312777e-01],
       [ 1.30433738e+00],
       [ 8.32333624e-01],
       [ 9.10141945e-01],
       [-1.95402175e-01],
       [-1.06986034e+00],
       [ 1.10821402e+00],
       [-1.39241308e-01],
       [-9.89909530e-01],
       [ 4.89472039e-02],
       [-8.52360368e-01],
       [-1.43903768e+00],
       [ 5.79346418e-01],
       [-1.01157117e+00],
       [ 9.05100226e-01],
       [-5.16619347e-02],
       [ 1.06320441e+00],
       [ 5.11557497e-02],
       [-9.22539949e-01],
       [-2.03765082e+00],
       [ 8.44934464e-01],
       [ 3.15759063e-01],
       [-4.06293988e-01],
       [-1.04356468e+00],
       [-2.78512120e+00],
       [-1.85211599e+00],
       [ 4.51204628e-01],
       [ 3.96401733e-01],
       [ 1.11832297e+00],
       [ 9.92170632e-01],
       [-6.61871016e-01],
       [ 1.10089862e+00],
       [ 2.65899934e-02],
       [ 6.96750939e-01],
       [ 3.60055603e-02],
       [ 4.42502826e-01],
       [ 1.32942498e+00],
       [ 7.70188391e-01],
       [ 1.51264572e+00],
       [-8.45213473e-01],
       [ 2.84633011e-01],
       [-8.56528878e-02],
       [-1.43650496e+00],
       [ 3.03723395e-01],
       [ 5.22436142e-01],
       [ 1.22615063e+00],
       [ 9.09931004e-01],
       [ 1.31563878e+00],
       [-1.16991671e-03],
       [-9.98629868e-01],
       [ 1.26976180e+00],
       [-1.44195616e+00],
       [ 2.98613280e-01],
       [-1.20759571e+00],
       [ 9.75366056e-01],
       [ 5.55955827e-01],
       [-1.57154047e+00],
       [ 9.10902202e-01],
       [-1.36684406e+00],
       [ 2.91386276e-01],
       [-1.53479159e+00],
       [-2.16175056e+00],
       [ 1.36270612e-01],
       [ 4.71654087e-01],
       [ 7.37280965e-01],
       [ 7.30745971e-01],
       [ 1.11952341e+00],
       [-1.84391057e+00],
       [ 5.42985797e-01],
       [-1.24474466e-01],
       [-2.90602416e-01],
       [ 1.72621608e-01],
       [-1.36526795e-02],
       [ 6.73085868e-01],
       [-2.06324434e+00],
       [ 4.71875191e-01],
       [ 4.62054253e-01],
       [ 1.08057201e+00],
       [-1.40729034e+00],
       [-1.09038782e+00],
       [ 1.17768094e-01],
       [-4.19126116e-02],
       [ 1.09365022e+00],
       [ 9.87251997e-02],
       [-1.74598300e+00],
       [-1.46835864e+00],
       [ 5.68061650e-01],
       [ 1.11516511e+00],
       [-1.08592176e+00],
       [-9.20174122e-01],
       [-3.20152044e+00],
       [-1.53171623e+00],
       [-1.12078726e+00],
       [-3.76025379e-01],
       [-5.68312466e-01],
       [-1.62122548e-01],
       [-1.22766709e+00],
       [-1.12846345e-01],
       [ 6.23853028e-01],
       [ 1.02843094e+00],
       [ 1.07945263e-01],
       [ 1.70268431e-01],
       [ 1.25617772e-01],
       [ 7.11729288e-01],
       [-3.25015783e+00],
       [ 7.38437474e-02],
       [-2.45575339e-01],
       [ 1.16022122e+00],
       [-3.59005123e-01],
       [ 1.04464805e+00],
       [-5.55668592e-01],
       [ 1.09465992e+00],
       [-8.17058623e-01],
       [ 1.39014915e-01],
       [-8.93750608e-01],
       [-6.13648593e-01],
       [ 3.55499715e-01],
       [-1.82588071e-01],
       [ 3.20608586e-01],
       [-1.14689708e+00],
       [-6.56674266e-01],
       [ 1.29371798e+00],
       [ 7.81974614e-01],
       [-4.89241362e-01],
       [-1.23924315e+00],
       [-3.83279711e-01],
       [-6.76795781e-01],
       [ 1.59546471e+00],
       [-8.35786402e-01],
       [-6.45230234e-01],
       [-1.17509282e+00],
       [ 7.80216396e-01],
       [ 1.74909979e-01],
       [ 6.88567400e-01],
       [ 8.38780850e-02],
       [ 1.59270063e-01],
       [-6.10351324e-01],
       [ 1.00286996e+00],
       [ 1.17401338e+00],
       [ 4.32071596e-01],
       [-1.63599527e+00],
       [ 8.12521040e-01],
       [ 7.92818785e-01],
       [ 6.93403184e-02],
       [-1.08170652e+00],
       [-5.03658831e-01],
       [-4.73706909e-02],
       [-4.23268646e-01],
       [ 3.12597491e-02],
       [ 7.27069438e-01],
       [ 9.52139199e-02],
       [ 2.29732782e-01],
       [ 9.33728456e-01],
       [ 1.21199763e+00],
       [ 1.35553539e+00],
       [ 4.55510110e-01],
       [ 7.30035126e-01],
       [ 5.90461791e-01],
       [-5.84788978e-01],
       [ 9.15414095e-02],
       [ 1.13066244e+00],
       [-1.45133764e-01],
       [ 9.65184987e-01],
       [ 8.58497202e-01],
       [ 4.20144677e-01],
       [ 3.72954994e-01],
       [-9.77192700e-01],
       [-8.36410582e-01],
       [ 8.20704937e-01],
       [-7.74810493e-01],
       [ 9.25964117e-01],
       [ 5.09230912e-01],
       [ 1.49525791e-01],
       [ 6.64378464e-01],
       [ 1.55767095e+00],
       [ 1.16064572e+00],
       [ 3.18306834e-01],
       [ 4.40745264e-01],
       [ 1.40767109e+00],
       [-6.13686979e-01],
       [ 2.03147084e-01],
       [ 3.54101777e-01],
       [-1.50797379e+00],
       [ 3.18131596e-01],
       [-1.37317801e+00],
       [-1.29619809e-02],
       [ 5.98721921e-01],
       [-5.76220751e-01],
       [-1.16058624e+00],
       [ 1.26369905e+00],
       [ 5.70825338e-01],
       [ 7.75644004e-01],
       [ 7.99267709e-01],
       [ 4.39230651e-01],
       [ 3.67545694e-01],
       [ 1.12801111e+00],
       [ 8.11241329e-01],
       [ 1.63147950e+00],
       [-1.67399645e+00],
       [ 4.60181266e-01],
       [-2.49360538e+00],
       [-3.21395956e-02],
       [ 1.04973352e+00],
       [-3.01008511e+00],
       [-3.31538647e-01],
       [ 9.03196037e-01],
       [-1.40111303e+00],
       [ 7.18792498e-01],
       [-1.54014075e+00],
       [-1.30988419e-01],
       [ 2.09673703e-01],
       [-8.79785240e-01],
       [-3.52250785e-01],
       [-1.62849927e+00],
       [ 8.13203275e-01],
       [ 4.73319203e-01],
       [ 5.47543526e-01],
       [ 1.43717563e+00],
       [-6.57539010e-01],
       [ 1.80131523e-03],
       [-1.02453232e+00],
       [-6.03744350e-02],
       [-5.91808915e-01],
       [ 6.97400570e-01],
       [-6.51037455e-01],
       [ 6.21403992e-01],
       [ 6.16609871e-01],
       [ 8.89318705e-01],
       [ 4.62804645e-01],
       [ 1.91137537e-01],
       [-4.52241033e-01],
       [-3.87682676e+00],
       [-5.14699399e-01],
       [ 4.73519824e-02],
       [-4.67809401e-02],
       [-9.39416349e-01],
       [ 1.26614523e+00],
       [ 5.33868432e-01],
       [ 8.69098723e-01],
       [ 5.28575540e-01],
       [ 4.38616961e-01],
       [ 7.70562053e-01],
       [-1.46373487e+00],
       [ 4.50699151e-01],
       [ 1.96949631e-01],
       [ 6.12041712e-01],
       [ 7.64083028e-01],
       [-3.52741361e+00],
       [ 1.45277262e-01],
       [ 1.65612787e-01],
       [ 4.38533425e-01],
       [-2.48667645e+00],
       [ 1.92773968e-01],
       [ 4.88228410e-01],
       [ 9.40659583e-01],
       [-1.02997154e-01],
       [ 4.82853390e-02],
       [-1.28369749e+00],
       [ 4.70904440e-01],
       [-9.93004322e-01],
       [ 1.41880476e+00],
       [-3.03581744e-01],
       [ 6.51034653e-01],
       [ 7.43983090e-01],
       [ 1.19432032e+00],
       [ 6.92491651e-01],
       [ 8.07432175e-01],
       [-9.82738376e-01],
       [ 9.22874391e-01],
       [ 6.67386532e-01],
       [ 1.28300339e-01],
       [ 1.52864009e-01],
       [ 4.68493134e-01],
       [-2.14909577e+00],
       [-4.78457987e-01],
       [ 3.02116156e-01],
       [-8.83207917e-01],
       [ 6.83480680e-01],
       [ 2.82940604e-02],
       [-2.89943755e-01],
       [ 7.12881625e-01],
       [ 1.29372728e+00],
       [-1.45629495e-01],
       [ 9.66929495e-02],
       [-2.03770113e+00],
       [ 4.70364362e-01],
       [ 2.04002596e-02],
       [ 1.28312588e+00],
       [ 2.34469458e-01],
       [-1.00727707e-01],
       [ 3.97281684e-02],
       [-1.65420103e+00],
       [ 1.51657537e-01],
       [ 1.04991764e-01],
       [ 6.52542591e-01],
       [-6.94869339e-01],
       [ 7.68318832e-01],
       [-2.69931525e-01],
       [-2.84774117e-02],
       [ 2.28126213e-01],
       [ 9.32809949e-01],
       [-2.49768585e-01],
       [ 1.96041882e-01],
       [ 6.80984080e-01],
       [ 2.54315346e-01],
       [-1.85379517e+00],
       [-8.01015198e-01],
       [ 4.35611635e-01],
       [ 1.02972734e+00],
       [-3.54356766e-01],
       [-1.18666327e+00],
       [ 4.95746523e-01],
       [-9.84485805e-01],
       [ 3.38030964e-01],
       [-1.34528124e+00],
       [ 6.79492831e-01],
       [ 1.31230009e+00],
       [ 2.68432260e-01],
       [ 3.62544395e-02],
       [ 1.18392631e-01],
       [ 7.56380737e-01],
       [ 8.19890374e-06],
       [ 2.67845720e-01],
       [-1.88982773e+00],
       [-1.96749878e+00],
       [ 3.13791670e-02],
       [-6.87573671e-01],
       [-1.71962702e+00],
       [-3.93082786e+00],
       [ 1.52391875e+00],
       [-2.10408717e-01],
       [ 5.26753783e-01],
       [ 1.66754222e+00],
       [-2.53525615e-01],
       [-7.00067699e-01],
       [ 7.59080768e-01],
       [ 3.82731289e-01],
       [ 6.41299069e-01],
       [-2.95760989e+00],
       [ 6.11043513e-01],
       [ 6.03483140e-01],
       [ 3.93115103e-01],
       [ 1.07154775e+00],
       [ 4.79877323e-01],
       [ 1.27206719e+00],
       [ 4.12568241e-01],
       [ 5.83164155e-01],
       [ 1.12685755e-01],
       [-9.65158761e-01],
       [-8.67547333e-01],
       [ 3.75361890e-01],
       [ 4.01545793e-01],
       [ 5.47808230e-01],
       [ 7.25134730e-01],
       [-5.31004846e-01]], dtype=float32)
In [43]:
# Plot the scaled result

plt.plot(y_test, y_predict, "^", color = 'b')
plt.xlabel('Model Predictions')
plt.ylabel('True Values')
Out[43]:
Text(0, 0.5, 'True Values')
In [38]:
y_predict_orig = scaler_y.inverse_transform(y_predict)
y_test_orig = scaler_y.inverse_transform(y_test)
In [44]:
# Plot the original values

plt.plot(y_test_orig, y_predict_orig, "^", color = 'b')
plt.xlabel('Model Predictions')
plt.ylabel('True Values')
Out[44]:
Text(0, 0.5, 'True Values')
In [40]:
# Plot the KPIs

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from math import sqrt

k = X_test.shape[1]
n = len(X_test)
RMSE = float(format(np.sqrt(mean_squared_error(y_test_orig, y_predict_orig)),'.3f'))
MSE = mean_squared_error(y_test_orig, y_predict_orig)
MAE = mean_absolute_error(y_test_orig, y_predict_orig)
r2 = r2_score(y_test_orig, y_predict_orig)
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)

print('RMSE =',RMSE, '\nMSE =',MSE, '\nMAE =',MAE, '\nR2 =', r2, '\nAdjusted R2 =', adj_r2) 
RMSE = 3.993 
MSE = 15.946937 
MAE = 2.9852057 
R2 = 0.8340892506152433 
Adjusted R2 = 0.8279335514331234
In [ ]:
!jupyter nbconvert  .ipynb --to html